	*******************************************************************
	*          	BUILD DATASET FOR JACKKNIFE REGRESSIONS   	    	  *
	*   Adds in data from QGIS on distance to border and segments     *
	*   Last edited: 4/7/2023 									      *
	*******************************************************************
log using "${CodePath}/log/jackknife_build_$S_DATE.log", text replace
*---------------------------------------------------- *
* 		   CONSTRUCT JACKKNIFE DATASET 				  *
*---------------------------------------------------- *
use "${DataPath}/derived/hospyear_0715.dta", clear

// average and jackknife audit rates among acute hospitals -- available in replication packet
merge m:1 pn using "${DataPath}/audit_averages.dta", gen(_acutehospmerge) 

// Create jackknife audit/demand rate -- audit/demand rate of others in your RACregion
	
	forval y = 2000/2018{
		gen jk_RACregion_audit_in2011_X`y' = jk_RACregion_audit_in2011 * (fyear == `y') * 100 
		gen jk_state_audit_in2011_X`y' = jk_state_audit_in2011 * (fyear == `y') * 100 
		gen jkhrr_RAC_audit_in2011_X`y' = jkhrr_RACregion_audit_in2011 * (fyear == `y') * 100 
		gen jkhrr_state_audit_in2011_X`y' = jkhrr_state_audit_in2011 * (fyear == `y') * 100 
		gen jkchn_state_audit_in2011_X`y' = jkchn_state_audit_in2011 * (fyear == `y') * 100 

		gen RACregion_audit_in2011_X`y' = RACregion_audit_in2011 * (fyear == `y') * 100 
		gen state_audit_in2011_X`y' = state_audit_in2011 * (fyear == `y') * 100 
		gen audit_in2011_X`y' = audit_in2011 * (fyear == `y') * 100
		gen demandrate_in2011_X`y' = demandrate_in2011 * (fyear == `y') * 100
		gen demandafteraudit_in2011_X`y' = demandafterauditrate_in2011 * (fyear == `y') * 100

		gen jk_RACregion_demand_in2011_X`y' = jk_RACregion_demand_in2011 * (fyear == `y') * 100 
		gen jk_state_demand_in2011_X`y' = jk_state_demand_in2011 * (fyear == `y') * 100 
		gen jkhrr_state_demand_in2011_X`y' = jkhrr_state_demand_in2011 * (fyear == `y') * 100 
		gen jkchn_state_demand_in2011_X`y' = jkchn_state_demand_in2011 * (fyear == `y') * 100 
		gen RACregion_demand_in2011_X`y' = RACregion_demand_in2011 * (fyear == `y') * 100 
		gen state_demand_in2011_X`y' = state_demand_in2011 * (fyear == `y') * 100 
	}


	gen jk_RACregion_audit_in2011_Xpost  = jk_RACregion_audit_in2011 * (fyear >= 2011) * 100
	gen RACregion_audit_in2011_Xpost  = RACregion_audit_in2011 * (fyear >= 2011) * 100

	gen jk_state_audit_in2011_Xpost  = jk_state_audit_in2011 * (fyear >= 2011) * 100
	gen state_audit_in2011_Xpost  = state_audit_in2011 * (fyear >= 2011) * 100

	gen jkhrr_state_audit_in2011_Xpost  = jkhrr_state_audit_in2011 * (fyear >= 2011) * 100
	gen jkhrr_RAC_audit_in2011_Xpost  = jkhrr_RACregion_audit_in2011 * (fyear >= 2011) * 100
	gen jkchn_state_audit_in2011_Xpost  = jkchn_state_audit_in2011 * (fyear >= 2011) * 100

	gen audit_in2011_Xpost = audit_in2011 * (fyear >= 2011) * 100
	

	
	label var audit_in2011_Xpost "audit rate in 2011 x post"
	label var jk_RACregion_audit_in2011_Xpost "jackknife (w/in region) audit rate in 2011 x post"
	label var jk_state_audit_in2011_Xpost "jackknife (w/in state) audit rate in 2011 x post"
	label var jkhrr_state_audit_in2011_Xpost "jackknife other HRR (w/in state) audit rate in 2011 x post"
	label var jkhrr_RAC_audit_in2011_Xpost "jackknife other HRR (w/in RAC region) audit rate in 2011 x post"
	label var jkchn_state_audit_in2011_Xpost "jackknife other chain (w/in state) audit rate in 2011 x post"
	label var RACregion_audit_in2011_Xpost "RAC region audit rate in 2011 x post"
	label var state_audit_in2011_Xpost "state audit rate in 2011 x post"

	gen jk_RACregion_demand_in2011_Xpost  = jk_RACregion_demand_in2011 * (fyear >= 2011) * 100
	gen RACregion_demand_in2011_Xpost  = RACregion_demand_in2011 * (fyear >= 2011) * 100

	gen jk_state_demand_in2011_Xpost  = jk_state_demand_in2011 * (fyear >= 2011) * 100
	gen state_demand_in2011_Xpost  = state_demand_in2011 * (fyear >= 2011) * 100
	gen jkhrr_state_demand_in2011_Xpost  = jkhrr_state_demand_in2011 * (fyear >= 2011) * 100
	gen jkchn_state_demand_in2011_Xpost  = jkchn_state_demand_in2011 * (fyear >= 2011) * 100

	gen demand_in2011_Xpost = demandrate_in2011 * (fyear >= 2011) * 100

	
	label var demand_in2011_Xpost "demand rate in 2011 x post"
	label var jk_RACregion_demand_in2011_Xpost "jackknife (w/in region) demand rate in 2011 x post"
	label var jk_state_demand_in2011_Xpost "jackknife (w/in state) demand rate in 2011 x post"
	label var jkhrr_state_demand_in2011_Xpost "jackknife other HRR (w/in state) demand rate in 2011 x post"
	label var jkchn_state_demand_in2011_Xpost "jackknife other chain (w/in state) demand rate in 2011 x post"
	label var RACregion_demand_in2011_Xpost "RAC region demand rate in 2011 x post"
	label var state_demand_in2011_Xpost "state demand rate in 2011 x post"

drop jk_RACregion_demand_in2011_X2010
drop jk_state_demand_in2011_X2010
drop jkhrr_state_demand_in2011_X2010
drop RACregion_demand_in2011_X2010
drop state_demand_in2011_X2010
drop demandrate_in2011_X2010
drop jk_RACregion_audit_in2011_X2010
drop jk_state_audit_in2011_X2010
drop jkhrr_state_audit_in2011_X2010
drop jkchn_state_audit_in2011_X2010
drop RACregion_audit_in2011_X2010
drop state_audit_in2011_X2010
drop audit_in2011_X2010

replace audit_in2011 = audit_in2011 * 100
replace demandrate_in2011 = demandrate_in2011 * 100
replace demandafterauditrate_in2011 = demandafterauditrate_in2011 * 100
replace jk_RACregion_audit_in2011 = jk_RACregion_audit_in2011 * 100
replace jk_state_audit_in2011 = jk_state_audit_in2011 * 100
replace jkhrr_state_audit_in2011 = jkhrr_state_audit_in2011 * 100
replace jkchn_state_audit_in2011 = jkchn_state_audit_in2011 * 100
replace RACregion_audit_in2011 = RACregion_audit_in2011 * 100
replace state_audit_in2011 = state_audit_in2011 * 100



// distance of hospital to border
preserve
	import delimited "${DataPath}/shapefiles/hosptoborder.csv", clear
	rename hubdist dist_to_border
	keep pn dist_to_border
	tempfile temp
	save `temp'
restore

merge m:1 pn using `temp', gen(_borderdistmerge) keep(1 3)

// ASSIGN HOSPITALS TO BORDER SEGMENTS
	// the hospcoord_to_XXmilesegments_state.csv files are generated in QGIS 3.0 as follows:
		* load shapefile of hospitals, state borders, and RAC borders
		* cut the RAC border into segments of a given length (GRASS v.split)
		* identify points where state borders intersect, then convert these points into lines (SAGA "convert points to line")
		* then cut the segmented RAC border by these intersections ("Split with line")
		* assign each smaller segment its own ID (toggle editing and then add an attribute = @row_num)
		* then calculate distance from each hospital ("Distance to nearest hub")
		* last generated: 1/19/2023

	// 50 mile segments -- available in replication packet
	preserve
		import delimited "${DataPath}/shapefiles/hospcoord_to_50segments_state.csv", clear 
		keep pn hubname
		rename hubname nearseg50_state
		tempfile temp
		save `temp'
	restore
	merge m:1 pn using `temp', gen(_nearseg50merge) keep(1 3) 

	// 100 mile segments (main spec) -- available in replication packet
	preserve
		import delimited "${DataPath}/shapefiles/hospcoord_to_100milesegments_state2.csv", clear 
		keep pn ns2_100_s
		rename ns2_100_s nearseg100_state
		tempfile temp
		save `temp'
	restore

	merge m:1 pn using `temp', gen(_nearseg100merge) keep(1 3) 

	// 150 mile segments -- available in replication packet
	preserve
		import delimited "${DataPath}/shapefiles/hospcoord_to_150segments_state.csv", clear 
		keep pn hubname
		rename hubname nearseg150_state
		tempfile temp
		save `temp'
	restore

	merge m:1 pn using `temp', gen(_nearseg150merge) keep(1 3) 


// QGIS creates some extra overlapping segments, so must generate new near segments that clean up the segments at corners of the RAC borders 
// this was done manually by checking against QGIS map
	// 50 mile segments
	gen 	nearseg50_state_2 = nearseg50_state
	replace nearseg50_state_2 = 39 if nearseg50_state_2 == 38 
	replace nearseg50_state_2 = 23 if nearseg50_state_2 == 22
	replace nearseg50_state_2 = 11 if nearseg50_state_2 == 12
	replace nearseg50_state_2 = 23 if nearseg50_state_2 == 22
	replace nearseg50_state_2 = 1 if nearseg50_state_2 == 2 | nearseg50_state_2 == 87 | nearseg50_state_2 == 86 | nearseg50_state_2 == 89 | nearseg50_state_2 == 88 
	replace nearseg50_state_2 = 52 if nearseg50_state_2 == 53
	replace nearseg50_state_2 = 67 if nearseg50_state_2 == 66
	replace nearseg50_state_2 = 61 if nearseg50_state_2 == 62
	replace nearseg50_state_2 = 101 if nearseg50_state_2 == 102 
	replace nearseg50_state_2 = 102 if nearseg50_state_2 == 103
	replace nearseg50_state_2 = 130 if nearseg50_state_2 == 133 | nearseg50_state_2 == 131 | nearseg50_state_2 == 132 


	// 100 mile segments
	gen 	nearseg100_state_2 = nearseg100_state
	replace nearseg100_state_2 = 5 if nearseg100_state_2 == 6 | nearseg100_state_2 == 7
	replace nearseg100_state_2 = 0 if nearseg100_state_2 == 50 | nearseg100_state_2 == 51 | nearseg100_state_2 == 53 | nearseg100_state_2 == 52 | nearseg100_state_2 == 30 | nearseg100_state_2 == 54
	replace nearseg100_state_2 = 40 if nearseg100_state_2 == 39
	replace nearseg100_state_2 = 45 if nearseg100_state_2 == 44 | nearseg100_state_2 == 43
	replace nearseg100_state_2 = 64 if nearseg100_state_2 == 66 | nearseg100_state_2 == 67
	replace nearseg100_state_2 = 68 if nearseg100_state_2 == 74 | nearseg100_state_2 == 69
	replace nearseg100_state_2 = 77 if nearseg100_state_2 == 77 | nearseg100_state_2 == 76

	// 150 mile segments -- in QGIS this split created a lot of duplicate/overlapping segments so have to fix manually by looking at shapefile
	gen 	nearseg150_state_2 = nearseg150_state
	replace nearseg150_state_2 = 24 if nearseg150_state_2 == 42
	replace nearseg150_state_2 = 23 if nearseg150_state_2 == 43
	replace nearseg150_state_2 = 23 if nearseg150_state_2 == 43
	replace nearseg150_state_2 = 21 if nearseg150_state_2 == 45
	replace nearseg150_state_2 = 21 if nearseg150_state_2 == 45
	replace nearseg150_state_2 = 18 if nearseg150_state_2 == 46
	replace nearseg150_state_2 = 17 if nearseg150_state_2 == 49
	replace nearseg150_state_2 = 15 if nearseg150_state_2 == 51
	replace nearseg150_state_2 = 11 if nearseg150_state_2 == 55
	replace nearseg150_state_2 = 10 if nearseg150_state_2 == 56
	replace nearseg150_state_2 = 8 if nearseg150_state_2 == 60
	replace nearseg150_state_2 = 3 if nearseg150_state_2 == 62 | nearseg150_state_2 == 64
	replace nearseg150_state_2 = 78 if nearseg150_state_2 == 63
	replace nearseg150_state_2 = 80 if nearseg150_state_2 == 38
	replace nearseg150_state_2 = 81 if nearseg150_state_2 == 37 | nearseg150_state_2 == 36
	replace nearseg150_state_2 = 82 if  nearseg150_state_2 == 35 | nearseg150_state_2 == 82
	replace nearseg150_state_2 = 85 if  nearseg150_state_2 == 33 | nearseg150_state_2 == 32 | nearseg150_state_2 == 86
	replace nearseg150_state_2 = 87 if nearseg150_state_2 == 31
	replace nearseg150_state_2 = 88 if nearseg150_state_2 == 30
	replace nearseg150_state_2 = 89 if nearseg150_state_2 == 29
	replace nearseg150_state_2 = 28 if nearseg150_state_2 == 90
	replace nearseg150_state_2 = 27 if nearseg150_state_2 == 91
	replace nearseg150_state_2 = 26 if nearseg150_state_2 == 92
	replace nearseg150_state_2 = 25 if nearseg150_state_2 == 93
	replace nearseg150_state_2 = 78 if nearseg150_state_2 == 63
	replace nearseg150_state_2 = 76 if nearseg150_state_2 == 66
	replace nearseg150_state_2 = 101 if nearseg150_state_2 == 67
	replace nearseg150_state_2 = 100 if nearseg150_state_2 == 68
	replace nearseg150_state_2 = 101 if nearseg150_state_2 == 67 | nearseg150_state_2 == 97
	replace nearseg150_state_2 = 70 if nearseg150_state_2 == 98 | nearseg150_state_2 == 69
	replace nearseg150_state_2 = 71 if nearseg150_state_2 == 96 | nearseg150_state_2 == 72
	replace nearseg150_state_2 = 94 if nearseg150_state_2 == 108
	replace nearseg150_state_2 = 73 if nearseg150_state_2 == 106
	replace nearseg150_state_2 = 74 if nearseg150_state_2 == 116 | nearseg150_state_2 == 117
	replace nearseg150_state_2 = 105 if nearseg150_state_2 == 118 | nearseg150_state_2 == 104
	replace nearseg150_state_2 = 115 if nearseg150_state_2 == 75
	replace nearseg150_state_2 = 103 if nearseg150_state_2 == 120
	replace nearseg150_state_2 = 119 if nearseg150_state_2 == 102
	replace nearseg150_state_2 = 110 if nearseg150_state_2 == 113 | nearseg150_state_2 == 111
	replace nearseg150_state_2 = 114 if nearseg150_state_2 == 40 | nearseg150_state_2 == 41

	// Create segment FEs
	local segmiles "50 100 150"
	foreach segmile of local segmiles{
		egen nearseg`segmile'_state_fyear = group(nearseg`segmile'_state fyear)
		egen nearseg`segmile'_state_2_fyear = group(nearseg`segmile'_state_2 fyear)
		egen nearseg`segmile'_state_2_state = group(nearseg`segmile'_state_2 state)
	}
	


// Define samples
// Overall hospital sample: hospitals that have non-missing values in 2007-2014 of audit rate, hospital costs, MEDPAR
	distinct pn
	cap drop sample_total
	gen sample_total = 1

	bys pn: egen pn_min = min(fyear)
	bys pn: egen pn_max = max(fyear)
	bys pn: egen pn_nyears = nvals(fyear)

	replace sample_total = 0 if pn_min > 2007
	replace sample_total = 0 if pn_max < 2014
	replace sample_total = 0 if pn_nyears < 8


	distinct pn if sample_total == 1
	drop pn_min pn_max pn_nyears
	foreach var of varlist audit_in2011 audit_in2012 audit_in2013 audit_in2014 audit_in2015 real_tot_net real_gcost_admin_net mean_los mean_pmt_amt tot_pmt_amt n_claims {
		cap drop `var'_min 
		cap drop `var'_max 
		cap drop fyear_temp
		cap drop `var'_ny

		di "checking for: `var'"
		if "`var'" != "audit_in2015"{
			gen fyear_temp = fyear if !missing(`var') & fyear >= 2007 & fyear <= 2014
			bys pn: egen `var'_min = min(fyear_temp)
			bys pn: egen `var'_max = max(fyear_temp)
			bys pn: egen `var'_ny = nvals(fyear_temp)
			replace sample_total = 0 if `var'_min > 2007
			replace sample_total = 0 if `var'_max < 2014
			replace sample_total = 0 if `var'_ny < 8
			distinct pn if sample_total == 1
			drop `var'_min `var'_max fyear_temp `var'_ny
		}
		

		if "`var'" == "audit_in2015"{
			gen fyear_temp = fyear if !missing(`var') & fyear >= 2007 & fyear <= 2015
			bys pn: egen `var'_min = min(fyear_temp)
			bys pn: egen `var'_max = max(fyear_temp)
			bys pn: egen `var'_ny = nvals(fyear_temp)
			replace sample_total = 0 if `var'_min > 2007
			replace sample_total = 0 if `var'_max < 2015
			replace sample_total = 0 if `var'_ny < 9
			distinct pn if sample_total == 1
			drop `var'_min `var'_max fyear_temp `var'_ny
		}
	}


save "${DataPath}/derived/hospyear_0716_jk.dta", replace

log close